{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Intel® Neural Compressor Sample for Tensorflow"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Introduction\n",
    "\n",
    "This is a demo to show how to use Intel® Neural Compressor to do quantization on ResNet."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "!conda install python==3.10 -y\n",
    "!{sys.executable} -m pip install -r requirements.txt \n",
    "\n",
    "!wget -nc https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(sys.executable)\n",
    "!{sys.executable} -m pip list\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import datasets\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create Dataloader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# login to huggingface to download the imagenet-1k dataset\n",
    "# you should replace this read-only token with your own by create one on (https://huggingface.co/settings/tokens)\n",
    "from huggingface_hub.hf_api import HfFolder\n",
    "HfFolder.save_token('hf_xxxxxxxxxxxxxxxxxxxxxx')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datasets import load_dataset\n",
    "# load dataset in streaming way will get an IterableDatset\n",
    "calib_dataset = load_dataset('imagenet-1k', split='train', streaming=True, token=True)\n",
    "eval_dataset = load_dataset('imagenet-1k', split='validation', streaming=True, token=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We can select only a subset of the dataset for demo, here just select 1k samples\n",
    "MAX_SAMPLE_LENGTG=1000\n",
    "def sample_data(dataset, max_sample_length):\n",
    "    data = {\"image\": [], \"label\": []}\n",
    "    for i, record in enumerate(dataset):\n",
    "        if i >= MAX_SAMPLE_LENGTG:\n",
    "            break\n",
    "        data[\"image\"].append(record['image'])\n",
    "        data[\"label\"].append(record['label'])\n",
    "    return datasets.Dataset.from_dict(data)\n",
    "\n",
    "sub_calib_dataset = sample_data(calib_dataset, MAX_SAMPLE_LENGTG)\n",
    "sub_eval_dataset = sample_data(eval_dataset, MAX_SAMPLE_LENGTG)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from neural_compressor.data.transforms.imagenet_transform import TensorflowResizeCropImagenetTransform\n",
    "height = width = 224\n",
    "transform = TensorflowResizeCropImagenetTransform(height, width)\n",
    "\n",
    "class CustomDataloader:\n",
    "    def __init__(self, dataset, batch_size=1):\n",
    "        '''dataset is a iterable dataset and will be loaded record by record at runtime.'''\n",
    "        self.dataset = dataset\n",
    "        self.batch_size = batch_size\n",
    "        import math\n",
    "        self.length = math.ceil(len(self.dataset) / self.batch_size)\n",
    "    \n",
    "    def __iter__(self):\n",
    "        batch_inputs = []\n",
    "        labels = []\n",
    "        for idx, record in enumerate(self.dataset):\n",
    "            # record e.g.: {'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=408x500 ...>, 'label': 91}\n",
    "            img = record['image']\n",
    "            label = record['label']\n",
    "            # skip the wrong shapes\n",
    "            if len(np.array(img).shape) != 3 or np.array(img).shape[-1] != 3:\n",
    "                continue\n",
    "            img_resized = transform((img, label))   # (img, label)\n",
    "            batch_inputs.append(np.array(img_resized[0]))\n",
    "            labels.append(label)\n",
    "            if (idx+1) % self.batch_size == 0:\n",
    "                yield np.array(batch_inputs), np.array(labels)   # (bs, 224, 224, 3), (bs,)\n",
    "                batch_inputs = []\n",
    "                labels = []\n",
    "    def __len__(self):\n",
    "        return self.length\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "calib_dataloader = CustomDataloader(dataset=sub_calib_dataset, batch_size=32)\n",
    "eval_dataloader = CustomDataloader(dataset=sub_eval_dataset, batch_size=32)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Quantization\n",
    "\n",
    "Then we are moving to the core quantization logics. `quantization.fit` is the main entry of converting our base model to the quantized model. We pass the prepared calibration and evaluation dataloder to `quantization.fit`. After converting, we obtain the quantized int8 model and save it locally. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "import time\n",
    "from neural_compressor import quantization\n",
    "from neural_compressor.config import PostTrainingQuantConfig\n",
    "\n",
    "conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100], excluded_precisions = ['bf16'])\n",
    "\n",
    "def eval_func(model):\n",
    "    from neural_compressor.model import Model\n",
    "    model = Model(model)\n",
    "    ans = []\n",
    "    total_cnt = 0\n",
    "    total_hit = 0\n",
    "    latency_list = []\n",
    "    for idx, (batch_inputs, labels) in enumerate(tqdm(eval_dataloader)):\n",
    "        feed_dict = dict(zip(model.input_tensor, [batch_inputs]))\n",
    "        start = time.time()\n",
    "        preds = model.sess.run(model.output_tensor, feed_dict)\n",
    "        end = time.time()\n",
    "        latency_list.append(end-start)\n",
    "        ans = np.argmax(preds[0], axis=-1)\n",
    "        labels += 1    # label shift\n",
    "        total_cnt += len(labels)\n",
    "        total_hit += np.sum(ans == labels)\n",
    "    acc = total_hit / total_cnt\n",
    "    latency = np.array(latency_list).mean() / eval_dataloader.batch_size\n",
    "    return acc\n",
    "\n",
    "q_model = quantization.fit(\"./resnet50_fp32_pretrained_model.pb\", conf=conf, calib_dataloader=calib_dataloader, eval_func=eval_func)\n",
    "q_model.save(\"resnet50_int8.pb\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Benchmark\n",
    "\n",
    "Now we can see that we have two models under the current directory: the original fp32 model `resnet50_fp32_pretrained_model.pb` and the quantized int8 model `resnet50_int8.pb`, and then we are going to do performance comparisons between them.\n",
    "\n",
    "\n",
    "To avoid the conflicts of jupyter notebook kernel to our benchmark process. We create a `resnet_quantization.py` and run it directly to do the benchmarks."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### FP32 benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!{sys.executable} resnet_benchmark.py --input_model resnet50_fp32_pretrained_model.pb 2>&1|tee fp32_benchmark.log\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### INT8 benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!{sys.executable} resnet_benchmark.py --input_model resnet50_int8.pb 2>&1|tee int8_benchmark.log\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally, you will get the performance in the logs like following:\n",
    "\n",
    "* fp32_benchmark.log\n",
    "\n",
    "```\n",
    "2023-08-28 22:46:39 [INFO] ********************************************\n",
    "2023-08-28 22:46:39 [INFO] |****Multiple Instance Benchmark Summary*****|\n",
    "2023-08-28 22:46:39 [INFO] +---------------------------------+----------+\n",
    "2023-08-28 22:46:39 [INFO] |              Items              |  Result  |\n",
    "2023-08-28 22:46:39 [INFO] +---------------------------------+----------+\n",
    "2023-08-28 22:46:39 [INFO] | Latency average [second/sample] | 0.027209 |\n",
    "2023-08-28 22:46:39 [INFO] | Throughput sum [samples/second] |  36.753  |\n",
    "2023-08-28 22:46:39 [INFO] +---------------------------------+----------+\n",
    "```\n",
    "\n",
    "* int8_benchmark.log\n",
    "\n",
    "```\n",
    "2023-08-28 22:48:35 [INFO] ********************************************\n",
    "2023-08-28 22:48:35 [INFO] |****Multiple Instance Benchmark Summary*****|\n",
    "2023-08-28 22:48:35 [INFO] +---------------------------------+----------+\n",
    "2023-08-28 22:48:35 [INFO] |              Items              |  Result  |\n",
    "2023-08-28 22:48:35 [INFO] +---------------------------------+----------+\n",
    "2023-08-28 22:48:35 [INFO] | Latency average [second/sample] | 0.006855 |\n",
    "2023-08-28 22:48:35 [INFO] | Throughput sum [samples/second] | 145.874  |\n",
    "2023-08-28 22:48:35 [INFO] +---------------------------------+----------+\n",
    "```\n",
    "\n",
    "As shown in the logs, the int8/fp32 performance gain is about 145.87/36.75 = 3.97x"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "spycsh-neuralchat",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
